#--------------------------------------------------------------------------------------
#
# refchem.R - code to prepare experimentsal / literature ER data
#
# July 2014
# Richard Judson
#
# US EPA
# Questions, comments to: judson.richard@epa.gov, 919-541-3085
#
#
#--------------------------------------------------------------------------------------
library(grDevices)
library(RColorBrewer)
library(stringr)
#library(class)
#library(kohonen)
source("utils.R")
#--------------------------------------------------------------------------------------
#
# read all
#
#--------------------------------------------------------------------------------------
read.all <- function(mode="step.1",nrows=-1) {
	if(!exists("ALL.DATA")) {
	    filename <- "ALL_exp_data_filtered.txt"
	    temp <- read.table(filename,header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="",nrows=nrows)
	    ALL.DATA <<- temp
	}
	
	if(!exists("ALL.DATA.2")) {
		anrow <- dim(ALL.DATA)[1]
		temp1 <- ALL.DATA[,1:2]
		temp1[,2] <- seq(1:anrow)
		names(temp1) <- c("CASSTRING","ROWNUM")
		temp2 <- NULL
		for(i in 1:anrow) {
			casstring <- temp1[i,1]
			counter <- temp1[i,2]
			if(str_detect(casstring,",")) {
				stemp <- str_replace_all(casstring,","," ")
				stemp <- str_replace_all(stemp,"  "," ")
				stemp.vec <- str_split(stemp," ")
				for(j in 1:length(stemp.vec[[1]])) {
					if(str_length(stemp.vec[[1]][j])>5) temp2 <- rbind(temp2,c(str_trim(stemp.vec[[1]][j]),counter))
				}
			}
			else {
				temp2 <- rbind(str_trim(temp2),c(casstring,counter))
			}
		}
		ALL.DATA.2 <<- temp2
	}
	
	if(!exists("ALL.DATA.4")) {
		temp2 <- ALL.DATA.2
		cat("Dimension of temp2:",dim(temp2),"\n")
		ntemp2 <- dim(temp2)[1]
		temp3 <- temp2
		for(i in 1:ntemp2) {
			castemp <- temp2[i,1]
			istop <- F
			while(!istop) {
				if(substr(castemp,1,1)=="0") castemp <- substr(castemp,2,str_length(castemp))
				else istop <- T
				print(castemp)
			}
			temp3[i,1] <- castemp
		}

		temp4 <- unique(temp3)
		ALL.DATA.4 <<- temp4
	}
	
	if(!exists("ALL.DATA.5")) {
		temp4 <- ALL.DATA.4
		temp5 <- as.data.frame(matrix(nrow=dim(temp4)[1],ncol=dim(ALL.DATA)[2]))
		names(temp5) <- names(ALL.DATA)
		for(i in 1:dim(temp4)[1]) {
			casrn <- temp4[i,1]
			irow <- temp4[i,2]
			temp5[i,] <- ALL.DATA[irow,]
			temp5[i,"CASRN"] <- casrn
		}	
		temp5 <- cbind(temp5[,1],temp5)
		names(temp5)[1] <- "CODE"
		ALL.DATA.5 <<- temp5
	}
	
	temp5 <- ALL.DATA.5
	x <- temp5[,1]
	x <- paste("C",x,sep="")
	x <- str_replace_all(x,"-","")
	temp5[,1] <- x
	fname <- "ER_literature.txt"
	write.table(temp5,fname,sep="\t",row.names=F)

	browser()
}
#--------------------------------------------------------------------------------------
#
# Calculate the least squares solution for all reference chemicals
#
#--------------------------------------------------------------------------------------
plot.AxB <- function(to.file=F) {
    if(to.file) {
        fname <- "AxB.pdf"
        pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(3,2),mar=c(4,4,4,1))
	fname <- "ER_literature.txt"
	DATA <- read.table(fname,header=T,sep="\t",stringsAsFactors=F,quote="\"")
	var.list <- sort(unique(DATA[,"ENDPOINT_NAME"]))
	#var.list <- var.list[!is.element(var.list,c("INH","Agonism","Antagonism"))]
	nvar <- length(var.list)
	for(i in 1:(nvar-1)) {
		Avar <- var.list[i]
		Atemp <- DATA[is.element(DATA[,"ENDPOINT_NAME"],Avar),]
		Achems <- unique(Atemp[,"CODE"])
		
		for(j in (i+1):nvar) {
			Bvar <- var.list[j]
			Btemp <- DATA[is.element(DATA[,"ENDPOINT_NAME"],Bvar),]
			Bchems <- unique(Btemp[,"CODE"])
			
			Both.chems <- Achems[is.element(Achems,Bchems)]
			if(length(Both.chems)>4) {
				Aunique <- Atemp[is.element(Atemp[,"CODE"],Both.chems),]
				Bunique <- Btemp[is.element(Btemp[,"CODE"],Both.chems),]
				x <- c()
				y <- c()
				for(k in 1:length(Both.chems)) {
					code <- Both.chems[k]
					Aunique.code <- as.data.frame(Aunique[is.element(Aunique[,"CODE"],code),])
					Bunique.code <- as.data.frame(Bunique[is.element(Bunique[,"CODE"],code),])
					#browser()
					for(l in 1:dim(Aunique.code)[1]) {
						for(m in 1:dim(Bunique.code)[1]) {
							x <- c(x,Aunique.code[l,"ENDPOINT_VALUE"])
							y <- c(y,Bunique.code[m,"ENDPOINT_VALUE"])
						}
					}
				}
				xmax <- max(x[x<1000000])
				ymax <- max(y[y<1000000])
				xmin <- min(x[x<1000000])
				ymin <- min(y[y<1000000])
				
				xlog <- F
				ylog <- F
				if(Avar=="EC50") xlog <- T
				if(Bvar=="EC50") ylog <- T
				if(Avar=="GI50") xlog <- T
				if(Bvar=="GI50") ylog <- T
				if(Avar=="IC30") xlog <- T
				if(Bvar=="IC30") ylog <- T

				if(Avar=="IC50") xlog <- T
				if(Bvar=="IC50") ylog <- T
				if(Avar=="REC10") xlog <- T
				if(Bvar=="REC10") ylog <- T
				if(Avar=="EC50") xlog <- T
				if(Bvar=="EC50") ylog <- T

				if(Avar=="Ki") xlog <- T
				if(Bvar=="Ki") ylog <- T
				if(Avar=="Ka") xlog <- T
				if(Bvar=="Ka") ylog <- T
				if(Avar=="Kd") xlog <- T
				if(Bvar=="Kd") ylog <- T

				#if(Avar=="INH") xlog <- T
				#if(Bvar=="INH") ylog <- T
				#if(Avar=="Agonism") xlog <- T
				#if(Bvar=="Agonism") ylog <- T
				#if(Avar=="Antagonism") xlog <- T
				#if(Bvar=="Antagonism") ylog <- T


				slog <- ""
				if(xlog && ylog) slog <- "xy"
				if(xlog && !ylog) slog <- "x"
				if(!xlog && ylog) slog <- "y"
				
				plot(y~x,xlab=Avar,ylab=Bvar,cex.lab=1.2,cex.axis=1.2,main=paste(Avar,":",Bvar),xlim=c(xmin,xmax),ylim=c(ymin,ymax),log=slog)
				if(!to.file) browser()
			
			}
		}
	}
	if(to.file) dev.off()	
}
######################################################################################
######################################################################################
######################################################################################
######################################################################################
######################################################################################
